t1 <- Sys.time()
EH23a <- read.csv("../FigureSideo/EH23a.softmasked_nuccomp.csv")
EH23a$chrom_num <- sub(".+chr", "", EH23a$Id)
EH23a$chrom_num[ EH23a$chrom_num == "X" ] <- 10
EH23a$chrom_num <- as.numeric(EH23a$chrom_num)
EH23a[1:3, ]
EH23b <- read.csv("../FigureSideo/EH23b.softmasked_nuccomp.csv")
EH23b$chrom_num <- sub(".+chr", "", EH23b$Id)
EH23b$chrom_num[ EH23b$chrom_num == "X" ] <- 10
EH23b$chrom_num <- as.numeric(EH23b$chrom_num)
EH23b[1:3, ]
#source("../FigureSideo/Rfunctions.R")
#EH23a_wins <- get_wins("../FigureSideo/")
#EH23b_wins <- get_wins("../FigureSideo/EH23b.softmasked_wins.csv")
syri <- read.table("/media/knausb/E737-9B48/knausb/mm2_maps/EH23a_EH23b/EH23a_EH23brcsyri.out",
sep = "\t", na.strings = "-")
names(syri) <- c("ref_chrom", "ref_start", "ref_end", "ref_seq", "query_seq", "query_chrom", "query_start", "query_end", "unique_ID", "parent_ID", "Annotation_Type", "Copy_Status")
#syri$ref_start[is.na(as.numeric(syri$ref_start))]
syri$ref_start <- as.numeric(syri$ref_start)
syri$ref_end <- as.numeric(syri$ref_end)
syri$query_start <- as.numeric(syri$query_start)
syri$query_end <- as.numeric(syri$query_end)
syri[1:3, ]
sort(table(syri$Annotation_Type), decreasing = TRUE)
syn <- syri[syri[, 11] == "SYN", ]
syn[1:3, ]
nrow(syn)
last_win <- 1
#coalesce <- 5e4
#coalesce <- 1e5
coalesce <- 4e5
#coalesce <- 1e6
for( j in 2:nrow(syn) ){
if( syn$ref_start[j] - syn$ref_end[last_win] < coalesce &
syn$query_start[j] - syn$query_end[last_win] < coalesce &
syn$ref_chrom[j] == syn$ref_chrom[last_win] ){
syn$ref_end[ last_win ] <- syn$ref_end[j]
syn$query_end[ last_win ] <- syn$query_end[j]
#syn$ref_end[j] <- NA
syn$unique_ID[j] <- NA
} else {
last_win <- j
}
}
#syn <- syn[ !is.na( syn$ref_end ), ]
syn <- syn[ !is.na( syn$unique_ID ), ]
syn[1:3, ]
nrow(syn)
min(syn$ref_end - syn$ref_start)
hist(syn$ref_end - syn$ref_start)
sort(syn$ref_end - syn$ref_start, decreasing = T)[1:10]
# names(syn) <- c("ref_chrom", "ref_start", "ref_end", "ref_seq", "query_seq", "query_chrom", "query_start", "query_end", "unique_ID", "parent_ID", "Annotation_Type", "Copy_Status")
# syn$ref_start <- as.numeric(syn$ref_start)
# syn$ref_end <- as.numeric(syn$ref_end)
# syn$query_start <- as.numeric(syn$query_start)
# syn$query_end <- as.numeric(syn$query_end)
syn[1:3, ]
inv <- syri[syri$Annotation_Type == "INV", ]
# tmp <- inv$query_start
# inv$query_start <- inv$query_end
# inv$query_end <- tmp
# inv[1:3, ]
#inv$query_end - inv$query_start
dup <- syri[syri$Annotation_Type == "DUP", ]
dup[1:3, ]
nrow(dup)
trans <- syri[syri$Annotation_Type == "TRANS", ]
trans[1:3, ]
nrow(trans)
# nrow(syn)
# x <- syn[1:1000, ]
# nsmooth <- 10
# min_len <- 1e5
#gribbon <- function(x, nsmooth = 50, min_len = 1e5){
gribbon <- function(
x,
nsmooth = 10,
min_len = 1e5,
coalesce = 0,
invert = FALSE){
x <- x[(x$ref_end - x$ref_start) >= min_len, ]
x <- x[(x$query_end - x$query_start) >= min_len, ]
if( nrow(x) <= 0 ){
return("No rows")
}
# if( coalesce > 0 ){
# last_win <- 1
# for( j in 2:nrow(x) ){
# if( x$ref_start[j] - x$ref_end[last_win] < coalesce &
# x$query_start[j] - x$query_end[last_win] < coalesce ){
# x$ref_end[ last_win ] <- x$ref_end[j]
# x$query_end[ last_win ] <- x$query_end[j]
# x$ref_end[j] <- NA
# } else {
# last_win <- j
# }
# }
# x <- x[ !is.na( x$ref_end ), ]
# }
if( invert ){
tmp <- x$query_start
x$query_start <- x$query_end
x$query_end <- tmp
}
x$ref_chrom_num <- sub(".+chr", "", x$ref_chrom)
x$ref_chrom_num[ x$ref_chrom_num == "X" ] <- 10
x$ref_chrom_num[ x$ref_chrom_num == "Y" ] <- 11
x$ref_chrom_num <- as.numeric(x$ref_chrom_num)
x$ref_chrom_num <- x$ref_chrom_num - 0.2
x$query_chrom_num <- sub(".+chr", "", x$query_chrom)
x$query_chrom_num[ x$query_chrom_num == "X" ] <- 10
x$query_chrom_num[ x$query_chrom_num == "Y" ] <- 11
x$query_chrom_num <- as.numeric(x$query_chrom_num)
x$query_chrom_num <- x$query_chrom_num + 0.2
# nrow(x)
my_x <- seq( from = -4, to = 4, length.out = nsmooth)
my_y <- 1/( 1 + exp(1)^-my_x)
my_x <- (my_x + 4)/8
# plot(my_x, my_y)
# my_x <- c(my_x, rev(my_x))
my_polys <- data.frame(
ID = rep(x$unique_ID, each = nsmooth * 2),
ref_chrom_num = rep(x$ref_chrom_num, each = nsmooth * 2),
# ref_start = rep(NA, each = nsmooth * 2),
# ref_end = rep(NA, each = nsmooth * 2),
query_chrom_num = rep(x$query_chrom_num, each = nsmooth * 2),
# query_start = rep(NA, each = nsmooth * 2),
# query_end = rep(NA, each = nsmooth * 2)
poly_xs = rep(NA, each = nsmooth * 2),
poly_ys = rep(NA, each = nsmooth * 2)
)
my_polys[1:3, ]
my_vs <- seq(1, nrow(my_polys) + 1, by = nsmooth * 2)
# for( i in seq(1, nrow(my_polys), by = nsmooth * 2) ){
for( i in 1:nrow(x) ){
my_index <- my_vs[i]:(my_vs[i+1] - 1)
xmins <- seq( x$ref_chrom_num[i], x$query_chrom_num[i], length.out = nsmooth)
# my_y * (x$query_end[i] - x$query_start[i]) + x$query_start[i]
ymins <- my_y * (x$query_start[i] - x$ref_start[i]) + x$ref_start[i]
ymaxs <- my_y * (x$query_end[i] - x$ref_end[i]) + x$ref_end[i]
my_polys$poly_xs[my_index] <- c(xmins, rev(xmins))
my_polys$poly_ys[my_index] <- c(ymins, rev(ymaxs))
#x$ref_start[ my_index ] <- seq(x$ref_chrom[i], x$query_chrom[i], length.out = nsmooth)
}
# my_polys[1:3, ]
return( my_polys )
}
#poly_coords <- gribbon(syn[1:1000, ])
#poly_coords <- gribbon(syn, min_len = 1e5)
#
nrow(syn)
## [1] 40
poly_coords <- gribbon(syn, min_len = 1e1, coalesce = 0)
#poly_coords <- gribbon(syn, min_len = 1e3)
# poly_coords <- gribbon(syn, nsmooth = 40, min_len = 1e3)
# poly_coords <- gribbon(syn, min_len = 1e1)
length(unique(poly_coords$ID))
## [1] 40
poly_coords_inv <- gribbon(inv, min_len = 1e4, invert = TRUE)
poly_coords_dup <- gribbon(dup, min_len = 1e4)
poly_coords_trans <- gribbon(trans, min_len = 1e4)
cmd <- "~/gits/hempy/bin/fast_win.py /media/knausb/Vining_lab/knausb/mm2_maps/EH23a_EH23b_v2/EH23b.rc458X.softmasked.fasta.gz --win_size 1000000"
#system(cmd)
# sampn <- "EH23b"
get_wins <- function( sampn ){
# sampd <- "../FigureSideo/EH23a.softmasked_wins.csv"
wins_dir <- "../FigureSideo/"
gffs_dir <- "/media/knausb/E737-9B48/releases/scaffolded/"
# sampn <- unlist(lapply(strsplit(sampd, split = "/"), function(x){x[length(x)]}))
# sampn <- unlist(lapply(strsplit(sampd, split = "//"), function(x){x[length(x)]}))
# sampn <- sub(".softmasked_wins.csv", "", sampn)
# sub(paste(sampn, ".softmasked_wins.csv", sep = ""), "", sampd)
if( sampn == "EH23b" ){
wins <- read.csv( "EH23b.rc458X.softmasked_wins.csv" )
wins$Id <- sub("EH23a", "EH23b", wins$Id)
} else {
wins <- read.csv( paste( wins_dir, sampn, ".softmasked_wins.csv", sep = "") )
}
# wins <- read.csv( paste( sampn, ".softmasked_wins.csv", sep = "") )
# wins <- read.csv( sampd )
# wins <- wins[ grep( paste(sampn, ".chr", sep = ""), wins$Id ), ]
wins$chrn <- sub(".+chr", "", wins$Id)
wins$chrn[ wins$chrn == "X" ] <- 10
wins$chrn[ wins$chrn == "Y" ] <- 11
wins$chrn <- as.numeric(wins$chrn)
#wins[1:3, ]
# Scale and center.
# Scaline of CG windows (chromosome width) is on a per chromosome basis.
# wins$CGs <- 0
wins$CGs <- wins$CG / wins$Win_length * 100
# wins$notCG <- (wins$Win_length / 1) - wins$CG
# wins$notCGs <- 0
for( j in unique(wins$Id) ){
wins$CGs[ wins$Id == j] <- wins$CGs[ wins$Id == j] - min(wins$CGs[ wins$Id == j], na.rm = TRUE)
wins$CGs[ wins$Id == j] <- wins$CGs[ wins$Id == j] / max(wins$CGs[ wins$Id == j], na.rm = TRUE)
#
# wins$CGs[ wins$Id == j] <- wins$CG[ wins$Id == j] - min(wins$CG[ wins$Id == j], na.rm = TRUE)
# wins$CGs[ wins$Id == j] <- wins$CGs[ wins$Id == j]/max(wins$CGs[ wins$Id == j], na.rm = TRUE)
#
# wins$notCGs[ wins$Id == j] <- wins$notCG[ wins$Id == j] - min(wins$notCG[ wins$Id == j], na.rm = TRUE)
# wins$notCGs[ wins$Id == j] <- wins$notCGs[ wins$Id == j]/max(wins$notCGs[ wins$Id == j], na.rm = TRUE)
}
wins$iCGs <- 1 - wins$CGs
#wins$ATs <- 1 - wins$CGs
#wins$ATs <- wins$Win_length/1e6 - wins$CGs
# genes <- read.table(
# paste(sampd, "/", sampn, ".primary_high_confidence.gff3.gz", sep = "" ),
# sep = "\t" )
genes <- read.table(
paste(gffs_dir, sampn, "/", sampn, ".primary_high_confidence.gff3.gz", sep = "" ),
sep = "\t" )
genes <- genes[genes[, 3] == "gene", ]
if( sampn == "EH23b" ){
# Column 4 is starts.
genes[ genes[, 1] == "EH23b.chr4", 4] <- EH23b$Length[ EH23b$Id == "EH23b.chr4" ] - genes[ genes[, 1] == "EH23b.chr4", 4]
genes[ genes[, 1] == "EH23b.chr4", 5] <- EH23b$Length[ EH23b$Id == "EH23b.chr4" ] - genes[ genes[, 1] == "EH23b.chr4", 5]
genes[ genes[, 1] == "EH23b.chr5", 4] <- EH23b$Length[ EH23b$Id == "EH23b.chr5" ] - genes[ genes[, 1] == "EH23b.chr5", 4]
genes[ genes[, 1] == "EH23b.chr5", 5] <- EH23b$Length[ EH23b$Id == "EH23b.chr5" ] - genes[ genes[, 1] == "EH23b.chr5", 5]
genes[ genes[, 1] == "EH23b.chr8", 4] <- EH23b$Length[ EH23b$Id == "EH23b.chr8" ] - genes[ genes[, 1] == "EH23b.chr8", 4]
genes[ genes[, 1] == "EH23b.chr8", 5] <- EH23b$Length[ EH23b$Id == "EH23b.chr8" ] - genes[ genes[, 1] == "EH23b.chr8", 5]
genes[ genes[, 1] == "EH23b.chrX", 4] <- EH23b$Length[ EH23b$Id == "EH23b.chrX" ] - genes[ genes[, 1] == "EH23b.chrX", 4]
genes[ genes[, 1] == "EH23b.chrX", 5] <- EH23b$Length[ EH23b$Id == "EH23b.chrX" ] - genes[ genes[, 1] == "EH23b.chrX", 5]
}
genes[1:3, ]
# Windowize
wins$gcnt <- 0
for(i in 1:nrow(wins)){
# if( sampn == "EH23b" ){
# wins$Id <- sub("EH23a", "EH23b", wins$Id)
# }
tmp <- genes[genes$V1 == wins$Id[i] & genes$V4 >= wins$Start[i] & genes$V5 < wins$End[i], ]
wins$gcnt[i] <- nrow(tmp)
}
# Scaling of gene count windows is on a per assembly basis.
wins$gcntsc <- wins$gcnt - min(wins$gcnt)
wins$gcntsc <- wins$gcntsc / max(wins$gcntsc)
my_index <- round( wins$gcntsc * 100 )
my_index[ my_index <= 0] <- 1
# Color ramp.
#wins$gcol <- heat.colors(n=100)[ my_index ]
#wins$gcol <- colorRampPalette(c("yellow", "orange", "red"))( 100 )[ my_index ]
#wins$gcol <- colorRampPalette(c("red", "orange", "yellow"))( 100 )[ my_index ]
#wins$gcol <- colorRampPalette(c("#0000FF", "#228B22", "#A0522D"))( 100 )[ my_index ]
#wins$gcol <- colorRampPalette(c("#87CEEB", "#3CB371", "#228B22", "#A0522D"))( 100 )[ my_index ]
#wins$gcol <- viridisLite::plasma(n = 100, alpha = 1, begin = 0.1, end = 1)[ my_index ]
wins$gcol <- viridisLite::magma(n = 100, alpha = 1, begin = 0.2, end = 1.00)[ my_index ]
return(wins)
}
EH23a_wins <- get_wins("EH23a")
EH23a_wins[1:3, ]
EH23b_wins <- get_wins("EH23b")
EH23b_wins[1:3, ]
Windowize SNPs
EH23a_wins[1:3, ]
## Id Win_number Start End Win_length A C G T
## 1 EH23a.chr1 0 0 999999 1000000 339510 159349 157789 343352
## 2 EH23a.chr1 1 1000000 1999999 1000000 346184 152100 152139 349577
## 3 EH23a.chr1 2 2000000 2999999 1000000 343278 155236 154692 346794
## CG CHG CHH chrn CGs iCGs gcnt gcntsc gcol
## 1 14003 19695 90544 1 0.07933312 0.9206669 156 1.0000000 #FCFDBFFF
## 2 13018 18441 88693 1 0.00000000 1.0000000 141 0.9038462 #FED799FF
## 3 14100 19606 89229 1 0.08714562 0.9128544 123 0.7884615 #FEAD77FF
snps <- syri[syri$Annotation_Type == "SNP", ]
nrow(snps)
## [1] 3660371
snps[1:3, ]
## ref_chrom ref_start ref_end ref_seq query_seq query_chrom query_start
## 5 EH23a.chr1 6376 6376 c t EH23a.chr1 43110
## 8 EH23a.chr1 6542 6542 A G EH23a.chr1 43276
## 11 EH23a.chr1 6779 6779 t c EH23a.chr1 43514
## query_end unique_ID parent_ID Annotation_Type Copy_Status
## 5 43110 SNP21930 SYN1 SNP <NA>
## 8 43276 SNP21933 SYN1 SNP <NA>
## 11 43514 SNP21936 SYN1 SNP <NA>
EH23a_wins$snps <- 0
for(i in 1:nrow(EH23a_wins)){
tmp <- snps[ snps$ref_chrom == EH23a_wins$Id[i] & snps$ref_start >= EH23a_wins$Start[i] & snps$ref_end <= EH23a_wins$End[i], ]
EH23a_wins$snps[i] <- nrow(tmp)
}
EH23a_wins$snpssc <- (EH23a_wins$snps - min(EH23a_wins$snps))/max(EH23a_wins$snps)
hist(EH23a_wins$snps)
hist(EH23a_wins$snps[ EH23a_wins$Id == "EH23a.chr4" ])
my_hist <- hist(EH23a_wins$snpssc * 9 + 1, plot = F)
#barplot(my_hist$density, space = 0, col = gray.colors(n=10))
#axis(side=1)
barplot(my_hist$density ~ my_hist$breaks[-10], space = 0, col = gray.colors(n=10))
sampn <- "EH23a"
ablstn <- read.csv(
paste("/media/knausb/Vining_lab/knausb/blast_projects/todd_rrna/blastn_uc/", sampn, "_blastn.csv", sep = ""),
header = FALSE)
colnames(ablstn) <- c("qseqid","qlen","sseqid","slen","qstart","qend",
"sstart","send","evalue","bitscore","score","length",
"pident","nident","mismatch","positive","gapopen",
"gaps","ppos","sstrand")
ablstn <- ablstn[grep("chr", ablstn$sseqid), ]
ablstn$chrn <- sub(".*chr", "", ablstn$sseqid)
ablstn$chrn[ablstn$chrn == "X"] <- 10
ablstn$chrn[ablstn$chrn == "Y"] <- 11
ablstn$chrn <- as.numeric(ablstn$chrn)
sampn <- "EH23b"
bblstn <- read.csv(
paste("/media/knausb/Vining_lab/knausb/blast_projects/todd_rrna/blastn_uc/", sampn, "_blastn.csv", sep = ""),
header = FALSE)
colnames(bblstn) <- c("qseqid","qlen","sseqid","slen","qstart","qend",
"sstart","send","evalue","bitscore","score","length",
"pident","nident","mismatch","positive","gapopen",
"gaps","ppos","sstrand")
bblstn <- bblstn[grep("chr", bblstn$sseqid), ]
bblstn$chrn <- sub(".*chr", "", bblstn$sseqid)
bblstn$chrn[bblstn$chrn == "X"] <- 10
bblstn$chrn[bblstn$chrn == "Y"] <- 11
bblstn$chrn <- as.numeric(bblstn$chrn)
my_chrom <- "EH23b.chr4"
bblstn$sstart[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$sstart[ bblstn$sseqid == my_chrom ]
bblstn$send[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$send[ bblstn$sseqid == my_chrom ]
my_chrom <- "EH23b.chr5"
bblstn$sstart[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$sstart[ bblstn$sseqid == my_chrom ]
bblstn$send[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$send[ bblstn$sseqid == my_chrom ]
my_chrom <- "EH23b.chr8"
bblstn$sstart[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$sstart[ bblstn$sseqid == my_chrom ]
bblstn$send[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$send[ bblstn$sseqid == my_chrom ]
my_chrom <- "EH23b.chrX"
bblstn$sstart[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$sstart[ bblstn$sseqid == my_chrom ]
bblstn$send[ bblstn$sseqid == my_chrom ] <- EH23b$Length[ EH23b$Id == my_chrom ] - bblstn$send[ bblstn$sseqid == my_chrom ]
sampn <- "EH23b"
library(ggplot2)
chrom_wid <- 0.05
p <- ggplot2::ggplot()
p <- p + theme_bw()
p <- p + ggplot2::geom_rect(
data = EH23a,
ggplot2::aes( xmin = chrom_num - chrom_wid - 0.2,
xmax = chrom_num + chrom_wid - 0.2,
ymin = 1, ymax = Length),
fill = "#DCDCDC",
color = "#000000"
)
p <- p + ggplot2::geom_rect(
data = EH23b,
ggplot2::aes( xmin = chrom_num - chrom_wid + 0.2,
xmax = chrom_num + chrom_wid + 0.2,
ymin = 1, ymax = Length),
fill = "#DCDCDC",
color = "#000000"
)
# EH23a_wins[1:3, ]
my_cols <- gray.colors(n=10, start = 0.3, end = 0.9)[round(EH23a_wins$snpssc * 9 + 1)]
#my_cols <- gray.colors(n=100, start = 0.3, end = 0.9)[round(EH23a_wins$snpssc * 99 + 1)]
#p <-
p + ggplot2::geom_rect(
data = EH23a_wins,
ggplot2::aes( xmin = chrn - 0.5,
xmax = chrn - 0.25,
ymin = Start, ymax = End),
# fill = "#DCDCDC",
fill = my_cols,
# color = "#000000"
)
# Theme
p <- p + ggplot2::scale_x_continuous(
breaks = EH23a$chrom_num,
# limits = c(0.6, 10.4),
# limits = c(0.5, 10.5),
# labels = EH23a$Id
labels = sub("a.chr", ".chr", EH23a$Id)
)
p <- p + scale_y_continuous(
breaks = seq( 0, 120e6, by = 10e6),
labels = seq( 0, 120, by = 10)
)
# p <- p + ggplot2::theme_bw() +
p <- p + ggplot2::theme(
# panel.grid.minor.x = ggplot2::element_blank(),
panel.grid.minor.x = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 3 ),
axis.text.x = element_text(angle = 60, hjust = 1),
axis.title.x=element_blank(),
panel.grid.major.y = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 1 ),
panel.grid.minor.y = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 3 )
)
p <- p + ggplot2::ylab("Position (Mbp)")
p <- p + ggtitle( "EH23" )
p
#p <- p + theme(legend.position='none')
p <- p + geom_polygon(
data = poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID), #, fill = col),
alpha = 2/5,
show.legend = FALSE,
#color = NA,
# color = "#80808022",
# color = "#000000",
color = "#696969",
# color = "#808080",
# color = "#A9A9A9",
# color = "#C0C0C0",
linewidth = 0.4,
# fill = ID
# fill = "#C0C0C044"
fill = "#808080"
)
p
p <- p + geom_polygon(
data = poly_coords_inv,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 0.9,
show.legend = FALSE,
#color = NA,
# color = "#80808022",
#color = "#000000",
# linewidth = 0.1,
# fill = ID
# fill = "#C0C0C044"
fill = "#FFA500"
# fill = "#808080"
)
p
# p + geom_polygon(
# data = poly_coords_dup,
# aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
# alpha = 0.9,
# show.legend = FALSE,
# fill = "#1E90FF"
# )
# p + geom_polygon(
# data = poly_coords_trans,
# aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
# alpha = 0.9,
# show.legend = FALSE,
# fill = "#1E90FF"
# )
thinw <- 0.28
p <- p + ggplot2::geom_rect(
data = EH23a_wins,
ggplot2::aes(
xmin = chrn - iCGs * thinw - 0.2,
#xmax = chrn + iCGs * thinw,
xmax = chrn - 0.2,
ymin = Start,
ymax = End),
fill = EH23a_wins$gcol,
color = NA
)
p <- p + ggplot2::geom_rect(
data = EH23b_wins,
ggplot2::aes(
#xmin = chrn - iCGs * thinw - 0.2,
xmin = chrn + 0.2,
xmax = chrn + iCGs * thinw + 0.2,
#xmax = chrn - 0.2,
ymin = Start,
ymax = End),
fill = EH23b_wins$gcol,
color = NA
)
p
#p + xlim(3.5, 4.5) #+ ylim(1e6, 2e6)
p + xlim(5.5, 8.5) + ylim(10e6, 40e6)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Warning: Removed 10 rows containing missing values (`geom_rect()`).
## Removed 10 rows containing missing values (`geom_rect()`).
## Warning: Removed 654 rows containing missing values (`geom_rect()`).
## Warning: Removed 653 rows containing missing values (`geom_rect()`).
#pEH23 <- p
#save(pEH23, file = "ideo.RData")
#load("ideo.RData")
#pEH23
# Cannabinoids
my_rows <- grep("AB2|LY|NC_044376.1:c427", ablstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = ablstn[my_rows, ],
ggplot2::aes(
xmin = chrn - blstw,
xmax = chrn + 0 - 0.14,
ymin = sstart,
ymax = send),
fill = "#228B22",
color = '#228B22'
)
my_rows <- grep("AB2|LY|NC_044376.1:c427", bblstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = bblstn[my_rows, ],
ggplot2::aes(
xmin = chrn + 0.14,
xmax = chrn + blstw,
ymin = sstart,
ymax = send),
fill = "#228B22",
color = '#228B22'
)
# 5S
my_rows <- grep("5S_", ablstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = ablstn[my_rows, ],
ggplot2::aes(
xmin = chrn - blstw,
xmax = chrn - 0.1,
ymin = sstart,
ymax = send),
fill = "#000000",
color = '#000000'
)
my_rows <- grep("5S_", bblstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = bblstn[my_rows, ],
ggplot2::aes(
xmin = chrn + 0.1,
xmax = chrn + blstw,
ymin = sstart,
ymax = send),
fill = "#000000",
color = '#000000'
)
# 45S
my_rows <- grep("45s_|26s_|5.8s_|18s_", ablstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = ablstn[my_rows, ],
ggplot2::aes(
xmin = chrn - blstw,
xmax = chrn - 0.1,
ymin = sstart,
ymax = send),
fill = "#B22222",
linewidth = 2,
# linewidth = 4,
color = '#B22222'
)
my_rows <- grep("45s_|26s_|5.8s_|18s_", bblstn$qseqid)
blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = bblstn[my_rows, ],
ggplot2::aes(
xmin = chrn + 0.1,
xmax = chrn + blstw,
ymin = sstart,
ymax = send),
fill = "#B22222",
linewidth = 2,
# linewidth = 4,
color = '#B22222'
)
# Cent, subteol
my_rows <- grep("AH3Ma_CS-237_satelite|CsatSD_centromere_237bp|CsatSD_centromere_370bp", ablstn$qseqid)
# blstw <- 0.48
p <- p + ggplot2::geom_rect(
data = ablstn[my_rows, ],
ggplot2::aes(
xmin = chrn - 0.5,
xmax = chrn - 0.2,
ymin = sstart,
ymax = send),
fill = "#1E90FF",
color = '#1E90FF'
)
my_rows <- grep("AH3Ma_CS-237_satelite|CsatSD_centromere_237bp|CsatSD_centromere_370bp", bblstn$qseqid)
p <- p + ggplot2::geom_rect(
data = bblstn[my_rows, ],
ggplot2::aes(
xmin = chrn + 0.2,
xmax = chrn + 0.5,
ymin = sstart,
ymax = send),
fill = "#1E90FF",
color = '#1E90FF'
)
pEH23 <- p
pEH23
#list.files("/media/knausb/Vining_lab/knausb/mm2_maps/salk_chrY/orientation2/")
# ape::read.FASTA("/media/knausb/Vining_lab/knausb/mm2_maps/salk_chrY/orientation2/EH23a.chrXrevcomp.fasta")
# ape::read.FASTA("/media/knausb/Vining_lab/knausb/mm2_maps/salk_chrY/orientation2/AH3Ma.chrX.fasta")
# ape::read.FASTA("/media/knausb/Vining_lab/knausb/mm2_maps/salk_chrY/orientation2/AH3Mb.chrYrevcomp.fasta")
# ape::read.FASTA("/media/knausb/Vining_lab/knausb/mm2_maps/salk_chrY/orientation2/BCMb.chrYrevcomp.fasta")
chrom_df <- data.frame(
Id = c('EH23a.chrXrevcomp', 'AH3Ma.chrX', 'AH3Mb.chrYrevcomp', 'BCMb.chrYrevcomp'),
Length = c(84640807, 84231629, 110682302, 107756508),
Pos = c(1, 2, 3, 4))
EH23a.chrX_AH3Ma_syri <- read.table("/media/knausb/Vining_lab/knausb/mm2_maps/salk_chrY/orientation2/EH23a.chrXrevcomp_AH3Ma.chrXsyri.out",
sep = "\t", na.strings = "-")
names(EH23a.chrX_AH3Ma_syri) <- c("ref_chrom", "ref_start", "ref_end", "ref_seq", "query_seq", "query_chrom", "query_start", "query_end", "unique_ID", "parent_ID", "Annotation_Type", "Copy_Status")
#syri$ref_start[is.na(as.numeric(syri$ref_start))]
EH23a.chrX_AH3Ma_syri$ref_chrom <- paste("EH23a.", EH23a.chrX_AH3Ma_syri$ref_chrom, sep="")
EH23a.chrX_AH3Ma_syri$ref_start <- as.numeric(EH23a.chrX_AH3Ma_syri$ref_start)
EH23a.chrX_AH3Ma_syri$ref_end <- as.numeric(EH23a.chrX_AH3Ma_syri$ref_end)
EH23a.chrX_AH3Ma_syri$query_chrom <- paste("AH3Ma.", EH23a.chrX_AH3Ma_syri$query_chrom, sep="")
EH23a.chrX_AH3Ma_syri$query_start <- as.numeric(EH23a.chrX_AH3Ma_syri$query_start)
EH23a.chrX_AH3Ma_syri$query_end <- as.numeric(EH23a.chrX_AH3Ma_syri$query_end)
syri <- read.table("/media/knausb/Vining_lab/knausb/mm2_maps/salk_chrY/orientation2/AH3Ma.chrX_AH3Mb.chrYrevcompsyri.out",
sep = "\t", na.strings = "-")
names(syri) <- c("ref_chrom", "ref_start", "ref_end", "ref_seq", "query_seq", "query_chrom", "query_start", "query_end", "unique_ID", "parent_ID", "Annotation_Type", "Copy_Status")
#syri$ref_start[is.na(as.numeric(syri$ref_start))]
syri$ref_chrom <- paste("AH3Ma.", syri$ref_chrom, sep="")
syri$ref_start <- as.numeric(syri$ref_start)
syri$ref_end <- as.numeric(syri$ref_end)
syri$query_chrom <- paste("AH3Mb.", syri$query_chrom, sep="")
syri$query_start <- as.numeric(syri$query_start)
syri$query_end <- as.numeric(syri$query_end)
AH3Ma.chrX_AH3Mb.chrY_syri <- syri
syri <- read.table("/media/knausb/Vining_lab/knausb/mm2_maps/salk_chrY/orientation2/AH3Mb.chrYrevcomp_BCMb.chrYrevcompsyri.out",
sep = "\t", na.strings = "-")
names(syri) <- c("ref_chrom", "ref_start", "ref_end", "ref_seq", "query_seq", "query_chrom", "query_start", "query_end", "unique_ID", "parent_ID", "Annotation_Type", "Copy_Status")
#syri$ref_start[is.na(as.numeric(syri$ref_start))]
syri$ref_chrom <- paste("AH3Mb.", syri$ref_chrom, sep="")
syri$ref_start <- as.numeric(syri$ref_start)
syri$ref_end <- as.numeric(syri$ref_end)
syri$query_chrom <- paste("BCMb.", syri$query_chrom, sep="")
syri$query_start <- as.numeric(syri$query_start)
syri$query_end <- as.numeric(syri$query_end)
AH3Mb.chrYrevcomp_BCMb.chrY_syri <- syri
coalesce_rows <- function(syn){
last_win <- 1
coalesce <- 4e5
for( j in 2:nrow(syn) ){
if( syn$ref_start[j] - syn$ref_end[last_win] < coalesce &
syn$query_start[j] - syn$query_end[last_win] < coalesce &
syn$ref_chrom[j] == syn$ref_chrom[last_win] ){
syn$ref_end[ last_win ] <- syn$ref_end[j]
syn$query_end[ last_win ] <- syn$query_end[j]
#syn$ref_end[j] <- NA
syn$unique_ID[j] <- NA
} else {
last_win <- j
}
}
#syn <- syn[ !is.na( syn$ref_end ), ]
syn <- syn[ !is.na( syn$unique_ID ), ]
return(syn)
}
table(EH23a.chrX_AH3Ma_syri$Annotation_Type)
##
## CPG CPL DEL DUP DUPAL HDR INS INV INVAL INVDP
## 56 173 30806 900 1251 2259 30325 14 142 823
## INVDPAL INVTR INVTRAL NOTAL SNP SYN SYNAL TDM TRANS TRANSAL
## 1156 423 591 5627 331654 1687 3873 12 421 592
EH23a.chrX_AH3Ma_syn <- EH23a.chrX_AH3Ma_syri[EH23a.chrX_AH3Ma_syri$Annotation_Type == "SYN", ]
EH23a.chrX_AH3Ma_syn <- coalesce_rows(EH23a.chrX_AH3Ma_syn)
#EH23a.chrX_AH3Ma_syn[1:3, ]
EH23a.chrX_AH3Ma_syn_poly_coords <- gribbon(EH23a.chrX_AH3Ma_syn, min_len = 1e1, coalesce = 0)
EH23a.chrX_AH3Ma_syn_poly_coords$poly_xs <- (EH23a.chrX_AH3Ma_syn_poly_coords$poly_xs - 9.8)/0.4 + 1
#EH23a.chrX_AH3Ma_syn_poly_coords[1:3, ]
AH3Ma_AH3Mb_syn <- AH3Ma.chrX_AH3Mb.chrY_syri[AH3Ma.chrX_AH3Mb.chrY_syri$Annotation_Type == "SYN", ]
AH3Ma_AH3Mb_syn <- coalesce_rows(AH3Ma_AH3Mb_syn)
AH3Ma_AH3Mb_syn_poly_coords <- gribbon(AH3Ma_AH3Mb_syn, min_len = 1e1, coalesce = 0)
AH3Ma_AH3Mb_syn_poly_coords$poly_xs <- (AH3Ma_AH3Mb_syn_poly_coords$poly_xs - 9.8)/0.4 + 1
AH3Ma_AH3Mb_syn_poly_coords$poly_xs <- AH3Ma_AH3Mb_syn_poly_coords$poly_xs + 1
AH3Mb_BCMb_syn <- AH3Mb.chrYrevcomp_BCMb.chrY_syri[AH3Mb.chrYrevcomp_BCMb.chrY_syri$Annotation_Type == "SYN", ]
AH3Mb_BCMb_syn <- coalesce_rows(AH3Mb_BCMb_syn)
AH3Mb_BCMb_syn_poly_coords <- gribbon(AH3Mb_BCMb_syn, min_len = 1e1, coalesce = 0)
AH3Mb_BCMb_syn_poly_coords$poly_xs <- (AH3Mb_BCMb_syn_poly_coords$poly_xs - 9.8)/0.4 + 1
AH3Mb_BCMb_syn_poly_coords$poly_xs <- AH3Mb_BCMb_syn_poly_coords$poly_xs + 2
EH23a.chrX_AH3Ma_inv <- EH23a.chrX_AH3Ma_syri[EH23a.chrX_AH3Ma_syri$Annotation_Type == "INV", ]
#EH23a.chrX_AH3Ma_inv <- coalesce_rows(EH23a.chrX_AH3Ma_inv)
#EH23a.chrX_AH3Ma_syn[1:3, ]
EH23a.chrX_AH3Ma_inv_poly_coords <- gribbon(EH23a.chrX_AH3Ma_inv, min_len = 1e1, coalesce = 0, invert = TRUE)
EH23a.chrX_AH3Ma_inv_poly_coords$poly_xs <- (EH23a.chrX_AH3Ma_inv_poly_coords$poly_xs - 9.8)/0.4 + 1
#EH23a.chrX_AH3Ma_syn_poly_coords[1:3, ]
AH3Ma_AH3Mb_inv <- AH3Ma.chrX_AH3Mb.chrY_syri[AH3Ma.chrX_AH3Mb.chrY_syri$Annotation_Type == "INV", ]
#AH3Ma_AH3Mb_inv <- coalesce_rows(AH3Ma_AH3Mb_inv)
AH3Ma_AH3Mb_inv_poly_coords <- gribbon(AH3Ma_AH3Mb_inv, min_len = 1e1, coalesce = 0, invert = TRUE)
AH3Ma_AH3Mb_inv_poly_coords$poly_xs <- (AH3Ma_AH3Mb_inv_poly_coords$poly_xs - 9.8)/0.4 + 1
AH3Ma_AH3Mb_inv_poly_coords$poly_xs <- AH3Ma_AH3Mb_inv_poly_coords$poly_xs + 1
AH3Mb_BCMb_inv <- AH3Mb.chrYrevcomp_BCMb.chrY_syri[AH3Mb.chrYrevcomp_BCMb.chrY_syri$Annotation_Type == "INV", ]
#AH3Mb_BCMb_inv <- coalesce_rows(AH3Mb_BCMb_inv)
AH3Mb_BCMb_inv_poly_coords <- gribbon(AH3Mb_BCMb_inv, min_len = 1e1, coalesce = 0, invert = TRUE)
AH3Mb_BCMb_inv_poly_coords$poly_xs <- (AH3Mb_BCMb_inv_poly_coords$poly_xs - 9.8)/0.4 + 1
AH3Mb_BCMb_inv_poly_coords$poly_xs <- AH3Mb_BCMb_inv_poly_coords$poly_xs + 2
dup <- EH23a.chrX_AH3Ma_syri[EH23a.chrX_AH3Ma_syri$Annotation_Type == "DUP", ]
dup_poly_coords <- gribbon(dup, min_len = 1e1, coalesce = 0, invert = FALSE)
dup_poly_coords$poly_xs <- (dup_poly_coords$poly_xs - 9.8)/0.4 + 1
dup_poly_coords$poly_xs <- dup_poly_coords$poly_xs + 0
EH23a.chrX_AH3Ma_dup_poly_coords <- dup_poly_coords
dup <- AH3Ma.chrX_AH3Mb.chrY_syri[AH3Ma.chrX_AH3Mb.chrY_syri$Annotation_Type == "DUP", ]
dup_poly_coords <- gribbon(dup, min_len = 1e1, coalesce = 0, invert = FALSE)
dup_poly_coords$poly_xs <- (dup_poly_coords$poly_xs - 9.8)/0.4 + 1
dup_poly_coords$poly_xs <- dup_poly_coords$poly_xs + 1
AH3Ma_AH3Mb_dup_poly_coords <- dup_poly_coords
dup <- AH3Mb.chrYrevcomp_BCMb.chrY_syri[AH3Mb.chrYrevcomp_BCMb.chrY_syri$Annotation_Type == "DUP", ]
dup_poly_coords <- gribbon(dup, min_len = 1e1, coalesce = 0, invert = FALSE)
dup_poly_coords$poly_xs <- (dup_poly_coords$poly_xs - 9.8)/0.4 + 1
dup_poly_coords$poly_xs <- dup_poly_coords$poly_xs + 2
AH3Mb_BCMb_dup_poly_coords <- dup_poly_coords
library(ggplot2)
chrom_wid <- 0.05
p <- ggplot2::ggplot()
p <- p + theme_bw()
p <- p + ggplot2::geom_rect(
data = chrom_df,
ggplot2::aes( xmin = Pos - chrom_wid,
xmax = Pos + chrom_wid,
ymin = 1, ymax = Length),
fill = "#DCDCDC",
color = "#000000"
)
# Theme
p <- p + ggplot2::scale_x_continuous(
labels = sub("\\.", "\n", sub("revcomp$", "", chrom_df$Id))
)
p <- p + scale_y_continuous(
breaks = seq( 0, 120e6, by = 20e6),
labels = seq( 0, 120, by = 20)
)
# p <- p + ggplot2::theme_bw() +
p <- p + ggplot2::theme(
# panel.grid.minor.x = ggplot2::element_blank(),
panel.grid.minor.x = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 3 ),
axis.text.x = element_text(angle = 60, hjust = 1),
axis.title.x=element_blank(),
panel.grid.major.y = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 1 ),
# panel.grid.minor.y = ggplot2::element_line( linewidth = 0.4, color = "#C0C0C0", linetype = 3 )
panel.grid.minor.y = ggplot2::element_blank()
)
p <- p + ggplot2::ylab("Position (Mbp)")
#p <- p + ggtitle( "Chromosomes X and Y" )
p
p <- p + geom_polygon(
data = EH23a.chrX_AH3Ma_syn_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
# color = "#696969",
# linewidth = 0.4,
fill = "#808080"
)
p <- p + geom_polygon(
data = EH23a.chrX_AH3Ma_inv_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
# color = "#696969",
# linewidth = 0.4,
color = "#FFA500",
fill = "#FFA500"
# fill = "#808080"
)
p <- p + geom_polygon(
data = EH23a.chrX_AH3Ma_dup_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
color = "#1E90FF44",
# color = "#696969",
linewidth = 0.1,
fill = "#1E90FF"
# fill = "#808080"
)
p <- p + geom_polygon(
data = AH3Ma_AH3Mb_syn_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
# color = "#696969",
# linewidth = 0.4,
fill = "#808080"
)
p <- p + geom_polygon(
data = AH3Ma_AH3Mb_inv_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
# color = "#696969",
# linewidth = 0.4,
color = "#FFA500",
fill = "#FFA500"
# fill = "#808080"
)
p <- p + geom_polygon(
data = AH3Ma_AH3Mb_dup_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
color = "#1E90FF44",
# color = "#696969",
linewidth = 0.1,
fill = "#1E90FF"
# fill = "#808080"
)
p <- p + geom_polygon(
data = AH3Mb_BCMb_syn_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
# color = "#696969",
# linewidth = 0.4,
fill = "#808080"
)
p <- p + geom_polygon(
data = AH3Mb_BCMb_inv_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
# color = "#696969",
# linewidth = 0.4,
color = "#FFA500",
fill = "#FFA500"
# fill = "#808080"
)
p <- p + geom_polygon(
data = AH3Mb_BCMb_dup_poly_coords,
aes(x = poly_xs, y = poly_ys, fill = ID, group = ID),
alpha = 2/5,
show.legend = FALSE,
color = "#1E90FF44",
# color = "#696969",
linewidth = 0.1,
fill = "#1E90FF"
# fill = "#808080"
)
#p
sex_chr <- p
sex_chr
my_data <- readr::read_csv("../Figure1b/gene_counts.csv", )
## Rows: 69 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Sample
## dbl (11): chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chrX, chrY
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# names(my_data)[1] <- "Sample"
# my_data$Sample <- as.factor( my_data$Sample )
my_data
## # A tibble: 69 × 12
## Sample chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chrX chrY
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AH3Ma 4052 2763 2586 3189 2424 2535 2058 2892 2610 3689 NA
## 2 AH3Mb 4062 2807 2620 3224 2563 2604 2080 2967 2509 NA 2850
## 3 BCMa 4137 2878 2682 3256 2597 2599 2031 2953 2654 3792 NA
## 4 BCMb 4059 2703 2611 3098 2601 2615 1986 2905 2569 NA 2751
## 5 BOAXa 4097 2877 2680 3147 2444 2718 2015 2950 2612 3778 NA
## 6 BOAXb 4039 2816 2507 3159 2499 2586 1995 2914 2551 3670 NA
## 7 COFBa 4365 2681 2552 3035 2404 2457 1793 2840 2483 3625 NA
## 8 COFBb 4078 2960 2755 3300 2768 2750 2212 3058 2769 3944 NA
## 9 COSVa 4063 2834 2677 3130 2595 2580 2048 2940 2594 3722 NA
## 10 COSVb 4049 2846 2690 3175 2601 2645 2029 2977 2566 3801 NA
## # ℹ 59 more rows
library(tidyr)
data_long <- my_data %>%
pivot_longer( cols = !Sample, names_to = "Chrom", values_to = "Count")
#data_long$Length <- data_long$Length / 1e6
gcount <- data_long
sum(is.na(data_long$Count))
## [1] 69
data_long <- data_long[!is.na(data_long$Count), ]
data_long
## # A tibble: 690 × 3
## Sample Chrom Count
## <chr> <chr> <dbl>
## 1 AH3Ma chr1 4052
## 2 AH3Ma chr2 2763
## 3 AH3Ma chr3 2586
## 4 AH3Ma chr4 3189
## 5 AH3Ma chr5 2424
## 6 AH3Ma chr6 2535
## 7 AH3Ma chr7 2058
## 8 AH3Ma chr8 2892
## 9 AH3Ma chr9 2610
## 10 AH3Ma chrX 3689
## # ℹ 680 more rows
my_data <- readr::read_csv("../Figure1b/chrom_lengths.csv")
## New names:
## Rows: 69 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (11): chr1, chr2, chr3, chr4, chr5, chr6, chr7, chr8, chr9, chrX,
## chrY
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
names(my_data)[1] <- "Sample"
my_data$Sample <- as.factor( my_data$Sample )
my_data
## # A tibble: 69 × 12
## Sample chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chrX
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AH3Ma 6.57e7 7.65e7 8.20e7 8.16e7 7.65e7 7.63e7 6.64e7 5.41e7 6.60e7 8.42e7
## 2 AH3Mb 6.63e7 7.62e7 8.11e7 8.09e7 7.80e7 7.64e7 6.65e7 5.52e7 6.50e7 NA
## 3 BCMa 6.51e7 7.58e7 8.62e7 8.12e7 7.67e7 7.53e7 6.55e7 5.48e7 6.49e7 8.33e7
## 4 BCMb 6.32e7 7.83e7 8.16e7 7.84e7 7.69e7 7.68e7 6.44e7 5.42e7 6.55e7 NA
## 5 BOAXa 6.53e7 7.96e7 8.19e7 7.80e7 7.64e7 7.91e7 6.40e7 5.48e7 6.53e7 8.32e7
## 6 BOAXb 6.50e7 7.76e7 7.95e7 7.97e7 7.57e7 7.58e7 6.49e7 5.44e7 6.60e7 8.26e7
## 7 COFBa 7.21e7 7.85e7 8.44e7 8.21e7 7.76e7 7.97e7 6.37e7 5.57e7 6.55e7 8.57e7
## 8 COFBb 6.47e7 7.71e7 8.21e7 8.16e7 7.77e7 7.71e7 6.84e7 5.52e7 6.63e7 8.46e7
## 9 COSVa 6.76e7 8.34e7 8.63e7 8.52e7 8.09e7 7.93e7 6.99e7 5.53e7 6.81e7 8.65e7
## 10 COSVb 6.61e7 8.15e7 8.74e7 8.40e7 8.46e7 8.30e7 6.50e7 5.75e7 6.78e7 8.58e7
## # ℹ 59 more rows
## # ℹ 1 more variable: chrY <dbl>
library(tidyr)
data_long <- my_data %>%
pivot_longer( cols = !Sample, names_to = "Chrom", values_to = "Length")
data_long$Length <- data_long$Length / 1e6
data_long
## # A tibble: 759 × 3
## Sample Chrom Length
## <fct> <chr> <dbl>
## 1 AH3Ma chr1 65.7
## 2 AH3Ma chr2 76.5
## 3 AH3Ma chr3 82.0
## 4 AH3Ma chr4 81.6
## 5 AH3Ma chr5 76.5
## 6 AH3Ma chr6 76.3
## 7 AH3Ma chr7 66.4
## 8 AH3Ma chr8 54.1
## 9 AH3Ma chr9 66.0
## 10 AH3Ma chrX 84.2
## # ℹ 749 more rows
glength <- data_long
glength
## # A tibble: 759 × 3
## Sample Chrom Length
## <fct> <chr> <dbl>
## 1 AH3Ma chr1 65.7
## 2 AH3Ma chr2 76.5
## 3 AH3Ma chr3 82.0
## 4 AH3Ma chr4 81.6
## 5 AH3Ma chr5 76.5
## 6 AH3Ma chr6 76.3
## 7 AH3Ma chr7 66.4
## 8 AH3Ma chr8 54.1
## 9 AH3Ma chr9 66.0
## 10 AH3Ma chrX 84.2
## # ℹ 749 more rows
gcount
## # A tibble: 759 × 3
## Sample Chrom Count
## <chr> <chr> <dbl>
## 1 AH3Ma chr1 4052
## 2 AH3Ma chr2 2763
## 3 AH3Ma chr3 2586
## 4 AH3Ma chr4 3189
## 5 AH3Ma chr5 2424
## 6 AH3Ma chr6 2535
## 7 AH3Ma chr7 2058
## 8 AH3Ma chr8 2892
## 9 AH3Ma chr9 2610
## 10 AH3Ma chrX 3689
## # ℹ 749 more rows
data_long <- cbind(glength, gcount$Count)
names(data_long)[4] <- "Count"
data_long[1:3, ]
## Sample Chrom Length Count
## 1 AH3Ma chr1 65.67137 4052
## 2 AH3Ma chr2 76.48150 2763
## 3 AH3Ma chr3 81.99513 2586
my_pal <- RColorBrewer::brewer.pal(n=12, name = "Paired")
#my_pal[11] <- "#B15928"
my_pal[11] <- "#C71585"
my_pal <- paste(my_pal, "88", sep = "")
library(ggplot2)
# Basic scatter plot
p <- ggplot(data_long, aes(x=Length, y=Count, color=Chrom))
p <- p + geom_point(size=2)
#p <- p + geom_smooth(method=lm)
p <- p + geom_smooth(method=lm, se=FALSE, linewidth = 1)
p <- p + theme_bw()
p <- p + theme(legend.title = element_blank())
#p <- p + theme(legend.position = "left")
p <- p + theme(legend.position = "right")
#p <- p + theme( legend.spacing.y = unit(1.0, 'mm') )
## important additional element
#p <- p + guides(fill = guide_legend(byrow = TRUE))
p + theme(legend.spacing.y = unit(1.0, 'mm')) +
guides(fill = guide_legend(byrow = TRUE))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 69 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 69 rows containing missing values (`geom_point()`).
#p <- p + scale_color_brewer(palette="Dark2")
#p <- p + scale_color_brewer(palette="Paired")
p <- p + scale_color_manual(values=my_pal)
#p <- p + scale_color_brewer(palette="Set3")
p <- p + xlab("Chromosome length (Mbp)")
p <- p + ylab("Genes per chromosome")
gchrom <- p
gchrom
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 69 rows containing non-finite values (`stat_smooth()`).
## Removed 69 rows containing missing values (`geom_point()`).
library("ggpubr")
ggarrange(
plotlist = list(pEH23,
ggarrange(sex_chr, gchrom,
ncol = 2, nrow = 1,
widths = c(2, 3),
labels = c("B", "C"))
),
labels = c("A", ""),
ncol = 1, nrow = 2,
widths = 1, heights = c(2, 1))
## Warning: Removed 69 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 69 rows containing missing values (`geom_point()`).
Figure 1. Genomic architecture of the Cannabis sativa genome.
afreq <- read.csv("/media/knausb/Vining_lab/knausb/GENOMES_NRQ_2267/ERBxHO40_hapERB/het_wins/allele_freqs.csv")
unique(afreq$CHROM)
## [1] "chr_1e" "chr_2e" "chr_3e" "chr_4e" "chr_5e" "chr_6e" "chr_7e" "chr_8e"
## [9] "chr_9e" "chr_Xe"
afreq[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1 count1.1
## 1 chr_1e 44392 267 463,71 0.2305615 1.299649 203 57 7
## 2 chr_1e 44437 266 287,245 0.4968837 1.987612 69 149 48
## 3 chr_1e 44763 266 270,262 0.4998869 1.999548 68 134 64
## countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 1 3 463 71 0.8670412 0.1329588 0.2134831 44392
## 2 4 287 245 0.5394737 0.4605263 0.5601504 44437
## 3 4 270 262 0.5075188 0.4924812 0.5037594 44763
tmp <- afreq
afreq <- tmp[tmp$CHROM == "chr_1e", ]
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_2e", ])
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_3e", ])
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_4e", ])
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_5e", ])
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_6e", ])
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_7e", ])
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_8e", ])
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_9e", ])
afreq <- rbind( afreq, tmp[tmp$CHROM == "chr_Xe", ])
rm(tmp)
#table(afreq$CHROM)
#hist(afreq$countNA)
#afreq <- afreq[afreq$countNA <= 2, ]
#
afreq <- afreq[afreq$countNA <= 0, ]
afreq[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1
## 13 chr_1e 62350 270 509,31 0.1082236 1.121357 240 29
## 30 chr_1e 98473 270 412,128 0.3617010 1.566664 145 122
## 54 chr_1e 204226 270 263,277 0.4996639 1.998657 65 133
## count1.1 countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 13 1 0 509 31 0.9425926 0.05740741 0.1074074 62350
## 30 3 0 412 128 0.7629630 0.23703704 0.4518519 98473
## 54 72 0 263 277 0.4870370 0.51296296 0.4925926 204226
Fis
#plot(2 * afreq$ERBfreq * afreq$HO40freq, y = afreq$He)
#plot(2 * afreq$ERBfreq * afreq$HO40freq, y = afreq$He2)
#hist(afreq$He)
Hs <- 2 * afreq$ERBfreq * afreq$HO40freq
Ho <- afreq$count0.1 / afreq$n
#afreq$Fis <- (Hs - afreq$He)/Hs
afreq$Fis <- (Hs - Ho)/Hs
# afreq$Fis <- (afreq$Fis + 1)/2 # Scale from 0 - 1.
afreq[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1
## 13 chr_1e 62350 270 509,31 0.1082236 1.121357 240 29
## 30 chr_1e 98473 270 412,128 0.3617010 1.566664 145 122
## 54 chr_1e 204226 270 263,277 0.4996639 1.998657 65 133
## count1.1 countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 13 1 0 509 31 0.9425926 0.05740741 0.1074074 62350
## 30 3 0 412 128 0.7629630 0.23703704 0.4518519 98473
## 54 72 0 263 277 0.4870370 0.51296296 0.4925926 204226
## Fis
## 13 0.007541669
## 30 -0.249241505
## 54 0.014152174
#hist(afreq$Fis)
# my_data$Fis <- (my_data$Hs - my_data$Ho)/my_data$Hs
nuccomp <- read.csv("../FigureSideo/EH23a.softmasked_nuccomp.csv")
#
nuccomp <- nuccomp[sort.int(nuccomp$Id, index.return = TRUE)$ix, ]
nuccomp$chrom_num <- sub(".+chr", "", nuccomp$Id)
nuccomp$chrom_num[ nuccomp$chrom_num == "X" ] <- 10
nuccomp$chrom_num <- as.numeric(nuccomp$chrom_num)
nuccomp[1:3, ]
## Id Length a A c C g G
## 1 EH23a.chr1 65878799 14682163 7271965 7482811 3467685 7489084 3471100
## 3 EH23a.chr2 78431500 21082855 4837213 11112037 2272064 11083165 2274791
## 4 EH23a.chr3 84599500 23084523 4686104 12369269 2189090 12364888 2196353
## t T w W s S m M k K r R y Y n N chrom_num
## 1 14719007 7290984 0 0 0 0 0 0 0 0 0 0 0 0 0 4000 1
## 3 20949851 4814024 0 0 0 0 0 0 0 0 0 0 0 0 0 5500 2
## 4 23003842 4701931 0 0 0 0 0 0 0 0 0 0 0 0 0 3500 3
nuccomp[, 1:2]
## Id Length
## 1 EH23a.chr1 65878799
## 3 EH23a.chr2 78431500
## 4 EH23a.chr3 84599500
## 5 EH23a.chr4 81926421
## 2 EH23a.chr5 78494000
## 9 EH23a.chr6 78694280
## 6 EH23a.chr7 65496500
## 7 EH23a.chr8 55717500
## 8 EH23a.chr9 66061500
## 10 EH23a.chrX 84640807
nuccomp$mids <- floor(nuccomp$Length/2)
nuccomp$Length2 <- 0
nuccomp$mids2 <- 0
nuccomp$Length2[1] <- nuccomp$Length[1]
nuccomp$mids2[1] <- nuccomp$mids[1]
for(i in 2:nrow(nuccomp)){
nuccomp$Length2[i] <- nuccomp$Length2[ i-1 ] + nuccomp$Length[ i ]
nuccomp$mids2[i] <- nuccomp$Length2[ i-1 ] + nuccomp$mids[i]
}
nuccomp$CHROM <- sub("^EH23a.", "", nuccomp$Id)
nuccomp$CHROM <- sub("chr", "chr_", nuccomp$CHROM)
nuccomp$CHROM <- paste(nuccomp$CHROM, "e", sep = "")
nuccomp[1:3, ]
## Id Length a A c C g G
## 1 EH23a.chr1 65878799 14682163 7271965 7482811 3467685 7489084 3471100
## 3 EH23a.chr2 78431500 21082855 4837213 11112037 2272064 11083165 2274791
## 4 EH23a.chr3 84599500 23084523 4686104 12369269 2189090 12364888 2196353
## t T w W s S m M k K r R y Y n N chrom_num mids Length2
## 1 14719007 7290984 0 0 0 0 0 0 0 0 0 0 0 0 0 4000 1 32939399 65878799
## 3 20949851 4814024 0 0 0 0 0 0 0 0 0 0 0 0 0 5500 2 39215750 144310299
## 4 23003842 4701931 0 0 0 0 0 0 0 0 0 0 0 0 0 3500 3 42299750 228909799
## mids2 CHROM
## 1 32939399 chr_1e
## 3 105094549 chr_2e
## 4 186610049 chr_3e
afreq$POS2 <- 0
for( i in 1:nrow(nuccomp) ){
# afreq$POS2[ afreq$CHROM == EH23a$Id[i] ] <- afreq$POS[ afreq$CHROM == EH23a$Id[i] ] + (EH23a$Length2[i] - EH23a$Length[i])
afreq$POS2[ afreq$CHROM == nuccomp$CHROM[i] ] <- afreq$POS[ afreq$CHROM == nuccomp$CHROM[i] ] + (nuccomp$Length2[i] - nuccomp$Length[i])
}
# EH23a$Id
# EH23a$Length2 - EH23a$Length
afreq[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1
## 13 chr_1e 62350 270 509,31 0.1082236 1.121357 240 29
## 30 chr_1e 98473 270 412,128 0.3617010 1.566664 145 122
## 54 chr_1e 204226 270 263,277 0.4996639 1.998657 65 133
## count1.1 countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 13 1 0 509 31 0.9425926 0.05740741 0.1074074 62350
## 30 3 0 412 128 0.7629630 0.23703704 0.4518519 98473
## 54 72 0 263 277 0.4870370 0.51296296 0.4925926 204226
## Fis
## 13 0.007541669
## 30 -0.249241505
## 54 0.014152174
EH23a <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
EH23a$Frequency <- EH23a$ERBfreq
EH23a$facet1 <- "EH23a"
EH23b <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
EH23b$Frequency <- EH23a$HO40freq
EH23b$facet1 <- "EH23b"
Het <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
Het$Frequency <- EH23a$He2
Het$facet1 <- "Het"
Het[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1
## 13 chr_1e 62350 270 509,31 0.1082236 1.121357 240 29
## 30 chr_1e 98473 270 412,128 0.3617010 1.566664 145 122
## 54 chr_1e 204226 270 263,277 0.4996639 1.998657 65 133
## count1.1 countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 13 1 0 509 31 0.9425926 0.05740741 0.1074074 62350
## 30 3 0 412 128 0.7629630 0.23703704 0.4518519 98473
## 54 72 0 263 277 0.4870370 0.51296296 0.4925926 204226
## Frequency facet1
## 13 0.1074074 Het
## 30 0.4518519 Het
## 54 0.4925926 Het
Fis_df <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
#Fis_df$Frequency <- Fis
Fis_df$Frequency <- afreq$Fis
Fis_df$facet1 <- "Fis"
#Fis_df[1:3, ]
#nrow(Fis_df)
# ncol(EH23a)
# ncol(EH23b)
# ncol(Het)
# ncol(Fis_df)
# Fis_df[1:3, ]
#
afreq1 <- rbind(EH23a, EH23b, Het)
#afreq <- rbind(EH23a, EH23b, Het, Fis_df)
#rbind(EH23a, EH23b, Fis)
afreq1$facet2 <- afreq1$facet1
afreq1$facet2 <- sub("EH23[ab]", "EH23", afreq1$facet2)
afreq1$facet2[ afreq1$facet2 == "EH23" ] <- "Allele"
afreq1$facet2[ afreq1$facet2 == "Het" ] <- "Heterozygosity"
table(afreq1$facet2)
##
## Allele Heterozygosity
## 70372 35186
afreq1[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1
## 13 chr_1e 62350 270 509,31 0.1082236 1.121357 240 29
## 30 chr_1e 98473 270 412,128 0.3617010 1.566664 145 122
## 54 chr_1e 204226 270 263,277 0.4996639 1.998657 65 133
## count1.1 countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 13 1 0 509 31 0.9425926 0.05740741 0.1074074 62350
## 30 3 0 412 128 0.7629630 0.23703704 0.4518519 98473
## 54 72 0 263 277 0.4870370 0.51296296 0.4925926 204226
## Frequency facet1 facet2
## 13 0.9425926 EH23a Allele
## 30 0.7629630 EH23a Allele
## 54 0.4870370 EH23a Allele
library(ggplot2)
#my_pal <- viridisLite::inferno(n = 10, begin = 0.2, end = 0.9, alpha = 0.01)
my_pal <- viridisLite::inferno(n = 10, begin = 0.2, end = 0.9, alpha = 0.04)
set.seed(99)
my_pal <- my_pal[sample(1:length(my_pal), size = length(my_pal))]
#palette(my_pal)
#as.numeric(as.factor(afreq$CHROM))
#afreq$col2 <- viridisLite::magma( n = 10, alpha = 0.8, begin = 0, end = 1 )[as.numeric(as.factor(afreq$CHROM))]
# range(afreq$POS2)
p <- ggplot( data = afreq1,
mapping = aes( x = POS2,
#y = ERBfreq,
#y = freq,
y = Frequency,
color = CHROM )
#color = col2)
)
p <- p + geom_point( shape = 20, show.legend = FALSE )
p <- p + theme_bw()
p <- p + theme( axis.title.x=element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1)
# plot.margin = unit(c(0.1,0.1,1,0.1), "cm")
)
#p <- p + theme(axis.title.x = element_blank(),
# axis.text.x = element_text( angle = 60, hjust = 1))
# p + scale_x_discrete(breaks = EH23a$mids2, labels = EH23a$Id)
# p + scale_x_discrete(breaks = EH23a$mids2)
p <- p + ggplot2::scale_x_continuous(
breaks = nuccomp$mids2,
expand = expansion(mult = 0.01, add = 0.0),
# expand = expansion(mult = 0.02, add = 0.0),
labels = sub("a.chr", ".chr", nuccomp$Id)
)
p <- p + geom_vline( xintercept = nuccomp$Length2, linetype = 1, color = "#808080", linewidth = 0.4 )
p <- p + geom_vline( xintercept = 0, linetype = 1, color = "#808080", linewidth = 0.4 )
#p <- p + geom_hline( yintercept = 0.5, linetype = 1, color = "#000000", linewidth = 1 )
p <- p + geom_hline( yintercept = c(0, 0.5), linetype = 1, color = "#000000", linewidth = 1 )
#p + scale_color_manual( values = viridisLite::magma( n = 10, alpha = 0.01, begin = 0.2, end = 0.9 ) )
#p <- p + xlim(100, sum(nuccomp$Length))
#p <- p + ylim(0.3, 0.7)
p <- p + scale_color_manual( values = my_pal )
#
#p <-
p + facet_grid(facet1 ~ .)
#p <-
ahplot <- p + facet_grid(facet2 ~ . , scales = "free_y")
#p
ahplot
#p + scale_color_manual( values = afreq$col2 )
#+ scale_color_viridis_b()
library("ggpubr")
ggarrange(
plotlist = list(pEH23, ahplot),
labels = c("A", "B"),
ncol = 1, nrow = 2,
widths = 1, heights = c(2, 1))
Figure 1. Genomic architecture of the Cannabis sativa genome.
n <- 270
#my_data <- data.frame( p = seq(from = 0.5, to = 1.0, by = 0.1) )
my_data <- data.frame( p = seq(from = 0.5, to = 0.6, by = 0.1) )
my_data$q <- 1 - my_data$p
#
my_data$f <- 0
#my_data$f <- -0.25
#my_data$f <- 0.04
my_data$AA <- (my_data$p^2 * (1 - my_data$f)) + (my_data$p * my_data$f)
my_data$Aa <- 2 * my_data$p * my_data$q * (1 - my_data$f)
my_data$aa <- (my_data$q^2 * (1 - my_data$f)) + (my_data$q * my_data$f)
my_data$Hexcess <- my_data$Aa - (2 * my_data$p * my_data$q)
my_data$Hs <- 2 * my_data$p * my_data$q
my_data$Ho <- my_data$Aa
my_data$Fis <- (my_data$Hs - my_data$Ho)/my_data$Hs
my_data$nAA <- my_data$AA * n
my_data$nAa <- my_data$Aa * n
my_data$naa <- my_data$aa * n
my_data$nHexcess <- my_data$Hexcess * n
knitr::kable(my_data)
| p | q | f | AA | Aa | aa | Hexcess | Hs | Ho | Fis | nAA | nAa | naa | nHexcess |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.5 | 0.5 | 0 | 0.25 | 0.50 | 0.25 | 0 | 0.50 | 0.50 | 0 | 67.5 | 135.0 | 67.5 | 0 |
| 0.6 | 0.4 | 0 | 0.36 | 0.48 | 0.16 | 0 | 0.48 | 0.48 | 0 | 97.2 | 129.6 | 43.2 | 0 |
n <- 270
#my_data <- data.frame( p = seq(from = 0.5, to = 1.0, by = 0.1) )
my_data <- data.frame( p = seq(from = 0.5, to = 0.6, by = 0.1) )
my_data$q <- 1 - my_data$p
#
my_data$f <- -0.25
#my_data$f <- 0.04
my_data$AA <- (my_data$p^2 * (1 - my_data$f)) + (my_data$p * my_data$f)
my_data$Aa <- 2 * my_data$p * my_data$q * (1 - my_data$f)
my_data$aa <- (my_data$q^2 * (1 - my_data$f)) + (my_data$q * my_data$f)
my_data$Hexcess <- my_data$Aa - (2 * my_data$p * my_data$q)
my_data$Hs <- 2 * my_data$p * my_data$q
my_data$Ho <- my_data$Aa
my_data$Fis <- (my_data$Hs - my_data$Ho)/my_data$Hs
my_data$nAA <- my_data$AA * n
my_data$nAa <- my_data$Aa * n
my_data$naa <- my_data$aa * n
my_data$nHexcess <- my_data$Hexcess * n
knitr::kable(my_data)
| p | q | f | AA | Aa | aa | Hexcess | Hs | Ho | Fis | nAA | nAa | naa | nHexcess |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0.5 | 0.5 | -0.25 | 0.1875 | 0.625 | 0.1875 | 0.125 | 0.50 | 0.625 | -0.25 | 50.625 | 168.75 | 50.625 | 33.75 |
| 0.6 | 0.4 | -0.25 | 0.3000 | 0.600 | 0.1000 | 0.120 | 0.48 | 0.600 | -0.25 | 81.000 | 162.00 | 27.000 | 32.40 |
Filter Alleles.
afreq[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1
## 13 chr_1e 62350 270 509,31 0.1082236 1.121357 240 29
## 30 chr_1e 98473 270 412,128 0.3617010 1.566664 145 122
## 54 chr_1e 204226 270 263,277 0.4996639 1.998657 65 133
## count1.1 countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 13 1 0 509 31 0.9425926 0.05740741 0.1074074 62350
## 30 3 0 412 128 0.7629630 0.23703704 0.4518519 98473
## 54 72 0 263 277 0.4870370 0.51296296 0.4925926 204226
## Fis
## 13 0.007541669
## 30 -0.249241505
## 54 0.014152174
afreq <- afreq[afreq$ERBfreq >= 0.3, ]
afreq <- afreq[afreq$ERBfreq <= 0.7, ]
afreq <- afreq[afreq$HO40freq >= 0.3, ]
afreq <- afreq[afreq$HO40freq <= 0.7, ]
afreq <- afreq[afreq$He2 >= 0.25, ]
afreq <- afreq[afreq$He2 <= 0.75, ]
EH23a <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
EH23a$Frequency <- EH23a$ERBfreq
EH23a$facet1 <- "EH23a"
EH23b <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
EH23b$Frequency <- EH23a$HO40freq
EH23b$facet1 <- "EH23b"
Het <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
Het$Frequency <- EH23a$He2
Het$facet1 <- "Het"
Het[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1
## 54 chr_1e 204226 270 263,277 0.4996639 1.998657 65 133
## 59 chr_1e 222275 270 270,270 0.5000000 2.000000 63 144
## 68 chr_1e 249752 270 263,277 0.4996639 1.998657 61 141
## count1.1 countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 54 72 0 263 277 0.487037 0.512963 0.4925926 204226
## 59 63 0 270 270 0.500000 0.500000 0.5333333 222275
## 68 68 0 263 277 0.487037 0.512963 0.5222222 249752
## Frequency facet1
## 54 0.4925926 Het
## 59 0.5333333 Het
## 68 0.5222222 Het
Fis_df <- afreq[, c("CHROM", "POS", "n", "Allele_counts", "He", "Ne", "count0.0",
"count0.1", "count1.1", "countNA", "ERBcount", "HO40count", "ERBfreq",
"HO40freq", "He2", "POS2")]
#Fis_df$Frequency <- Fis
Fis_df$Frequency <- afreq$Fis
Fis_df$facet1 <- "Fis"
#Fis_df[1:3, ]
#nrow(Fis_df)
# ncol(EH23a)
# ncol(EH23b)
# ncol(Het)
# ncol(Fis_df)
# Fis_df[1:3, ]
#afreq1 <- rbind(EH23a, EH23b, Het)
#
#
afreq <- rbind(EH23a, EH23b, Het, Fis_df)
afreq$facet2 <- afreq$facet1
afreq$facet2 <- sub("EH23[ab]", "EH23", afreq$facet2)
afreq$facet2[ afreq$facet2 == "EH23" ] <- "Allele"
afreq$facet2[ afreq$facet2 == "Het" ] <- "Heterozygosity"
table(afreq$facet2)
##
## Allele Fis Heterozygosity
## 45618 22809 22809
afreq[1:3, ]
## CHROM POS n Allele_counts He Ne count0.0 count0.1
## 54 chr_1e 204226 270 263,277 0.4996639 1.998657 65 133
## 59 chr_1e 222275 270 270,270 0.5000000 2.000000 63 144
## 68 chr_1e 249752 270 263,277 0.4996639 1.998657 61 141
## count1.1 countNA ERBcount HO40count ERBfreq HO40freq He2 POS2
## 54 72 0 263 277 0.487037 0.512963 0.4925926 204226
## 59 63 0 270 270 0.500000 0.500000 0.5333333 222275
## 68 68 0 263 277 0.487037 0.512963 0.5222222 249752
## Frequency facet1 facet2
## 54 0.487037 EH23a Allele
## 59 0.500000 EH23a Allele
## 68 0.487037 EH23a Allele
library(ggplot2)
table(afreq$facet2)
##
## Allele Fis Heterozygosity
## 45618 22809 22809
afreq <- afreq[afreq$facet2 != "Heterozygosity", ]
#my_pal <- viridisLite::inferno(n = 10, begin = 0.2, end = 0.9, alpha = 0.01)
my_pal <- viridisLite::inferno(n = 10, begin = 0.2, end = 0.9, alpha = 0.04)
set.seed(99)
my_pal <- my_pal[sample(1:length(my_pal), size = length(my_pal))]
#palette(my_pal)
#as.numeric(as.factor(afreq$CHROM))
#afreq$col2 <- viridisLite::magma( n = 10, alpha = 0.8, begin = 0, end = 1 )[as.numeric(as.factor(afreq$CHROM))]
# range(afreq$POS2)
p <- ggplot( data = afreq,
mapping = aes( x = POS2,
#y = ERBfreq,
#y = freq,
y = Frequency,
color = CHROM )
#color = col2)
)
p <- p + geom_point( shape = 20, show.legend = FALSE )
p <- p + theme_bw()
p <- p + theme( axis.title.x=element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1)
# plot.margin = unit(c(0.1,0.1,1,0.1), "cm")
)
#p <- p + theme(axis.title.x = element_blank(),
# axis.text.x = element_text( angle = 60, hjust = 1))
# p + scale_x_discrete(breaks = EH23a$mids2, labels = EH23a$Id)
# p + scale_x_discrete(breaks = EH23a$mids2)
p <- p + ggplot2::scale_x_continuous(
breaks = nuccomp$mids2,
expand = expansion(mult = 0.01, add = 0.0),
# expand = expansion(mult = 0.02, add = 0.0),
labels = sub("a.chr", ".chr", nuccomp$Id)
)
p <- p + theme( panel.grid.major.x = element_line(color = "#A9A9A9", linewidth = 0.0 ) )
p <- p + theme( panel.grid.minor.x = element_line(color = "#A9A9A9", linewidth = 0.0 ) )
# p <- p + geom_vline( xintercept = nuccomp$Length2, linetype = 1, color = "#808080", linewidth = 0.4 )
# p <- p + geom_vline( xintercept = 0, linetype = 1, color = "#808080", linewidth = 0.4 )
p <- p + geom_vline( xintercept = nuccomp$Length2, linetype = 5, color = "#808080", linewidth = 0.4 )
p <- p + geom_vline( xintercept = 0, linetype = 5, color = "#808080", linewidth = 0.4 )
#p <- p + geom_hline( yintercept = 0.5, linetype = 1, color = "#000000", linewidth = 1 )
#p <- p + geom_hline( yintercept = c(0, 0.5), linetype = 1, color = "#000000", linewidth = 1 )
#p + scale_color_manual( values = viridisLite::magma( n = 10, alpha = 0.01, begin = 0.2, end = 0.9 ) )
#p <- p + xlim(100, sum(nuccomp$Length))
#p <- p + ylim(0.3, 0.7)
p <- p + scale_color_manual( values = my_pal )
#
#p <- p + facet_grid(facet1 ~ .)
#p <-
#ahplot + ylab("")
ahplot <- ahplot + ylab(NULL)
ahplot <- ahplot + scale_y_continuous( breaks = seq(-1.0, 1.0, by = 0.2),
minor_breaks = seq(-1, 1, by = 0.1) )
ahplot <- p + facet_grid(facet2 ~ . , scales = "free_y")
#ahplot <- ahplot + theme( panel.grid.major.y = element_line(color = "#696969", linewidth = 0.6 ) )
ahplot <- ahplot + theme( panel.grid.major.y = element_line(color = "#A9A9A9", linewidth = 0.4 ) )
#ahplot + theme( panel.grid.major.y = element_line(color = "#C0C0C0", linewidth = 0.2, linetype = 1 ) )
# myPlots[[i]] <- myPlots[[i]] + theme( panel.grid.minor.y=element_line(color = "#C0C0C0", size=0.2) )
#p
ahplot
#hline_dat <- data.frame( threshold = c(0.5, 0.0) )
#ahplot + geom_hline(data=hline_dat, aes(yintercept=threshold), colour="salmon")
#p + scale_color_manual( values = afreq$col2 )
#+ scale_color_viridis_b()
library("ggpubr")
ggarrange(
plotlist = list(pEH23, ahplot),
labels = c("A", "B"),
ncol = 1, nrow = 2,
widths = 1, heights = c(2, 1))
Figure 1. Genomic architecture of the Cannabis sativa genome.
t99 <- Sys.time()
t99 - t1
## Time difference of 3.921986 mins